import numpy as np
import os
import csv
import json
version_num='d35'
train_ids=np.load('E:\\data_pi\\train_ids_'+version_num+'.npy')
test_ids=np.load('E:\\data_pi\\test_ids_'+version_num+'.npy')
train_hts=np.load('E:\\data_pi\\train_hts_'+version_num +'.npy')
test_hts=np.load('E:\\data_pi\\test_hts_'+version_num+'.npy')
collist=[ 'holidayvibes','gamenight','happyholidays','carsoftiktok','fallguysmoments','interiordesign','homecooked','veteransday','youwantmore','interiordesign','coldweather','wildanimals','mycostume','meleaving','mypfp','catchphrases','watchmegrow',
        'holidaycrafts','growupwithme','clingypet','happyhanukkah','lunarnewyear','tabletop','comfortfood','selfimprovement','2021affirmations','perfectmatch','givingszn','holidaycountdown','bakingszn','holidaymusic','familyimpression','inkdrawing','WeekendVibes','recordsday','productivity','smallbusiness'
      ,'falldiy','whenwewereyounger','yellow','ComingOfAge','artmas','gaminglife','gamingsetup','hellowinter','planttiktok','housetour','neonshadow','homeoffice','raisedby','makeitvogue','foodtiktok','valentinesday','yougotthis','stemlife']
ids={}
for col in collist:
    for file in os.listdir('D:\\Work\\Tool\\tiktok\\TikToks\\'):
        if col in file:
            dt = file.split('_')[2].replace('.json', '')
            with open('D:\\Work\\Tool\\tiktok\\TikToks\\' + file, 'r', encoding='utf-8',
                      newline='\n') as filename_input:
                lc = 0
                for line in filename_input:
                    z = json.loads(line)
                    if lc >= 2000:
                        break
                    k = ''
                    if 'id' in z.keys():
                        k = z['id']
                    elif 'itemInfos' in z.keys():
                        k = z['itemInfos']['id']
                    if k not in ids.keys():
                        ids[k]={col:dt}
                    elif col not in ids[k].keys():
                        ids[k][col]=dt
                    lc += 1
train_dt=[]
test_dt=[]

for id, ht in zip(train_ids,train_hts):
    for i in range(len(ht)):
        if int(ht[i]) ==1:
            break
    train_dt.append(ids[id][collist[int(i)]])
for id, ht in zip(test_ids, test_hts):
    for i in range(len(ht)):
        if int(ht[i]) ==1:
            break
    test_dt.append(ids[id][collist[int(i)]])
np.save('E:\\data_pi\\train_dts_'+version_num,train_dt)
np.save('E:\\data_pi\\test_dts_'+version_num,test_dt)